If we group certain PS’, do we see an interesting pattern of when these genes are more expressed? PS’ are grouped as follows
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(myTAI)
library(see)
# Non-transformed
Ec_PES_32m <-
readr::read_csv(file = "data/Ec_PES_32m.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f <-
readr::read_csv(file = "data/Ec_PES_25f.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# sqrt-tranformed
Ec_PES_32m.sqrt <-
readr::read_csv(file = "data/Ec_PES_32m.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.sqrt <-
readr::read_csv(file = "data/Ec_PES_25f.sqrt.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# log2-tranformed
Ec_PES_32m.log2 <-
readr::read_csv(file = "data/Ec_PES_32m.log2.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.log2 <-
readr::read_csv(file = "data/Ec_PES_25f.log2.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# rank-tranformed
Ec_PES_32m.rank <-
readr::read_csv(file = "data/Ec_PES_32m.rank.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.rank <-
readr::read_csv(file = "data/Ec_PES_25f.rank.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# rlog-tranformed
Ec_PES_32m.rlog <-
readr::read_csv(file = "data/Ec_PES_32m.rlog.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.rlog <-
readr::read_csv(file = "data/Ec_PES_25f.rlog.csv")
## Rows: 11571 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
group_PS <- function(PES, Groups = list(Ancient = c(1:2),
Early = c(3:6),
Phaeophyceae = 7,
Recent = c(8:11))){
if(!myTAI::is.ExpressionSet(PES)) stop("PES is not an ExpressionSet")
if(length(Groups) != 4) stop("Rewrite the function or have just four groups")
PES_new <- PES %>%
mutate(Groups = case_when(
Phylostratum %in% Groups[[1]] ~ names(Groups[1]),
Phylostratum %in% Groups[[2]] ~ names(Groups[2]),
Phylostratum %in% Groups[[3]] ~ names(Groups[3]),
Phylostratum %in% Groups[[4]] ~ names(Groups[4]),
TRUE ~ NA_character_
))
return(PES_new)
}
plot_PGroups <- function(grouped_PES, ordered_stages){
# pivot the dataframe longer for ggplot2
grouped_PES_filt <- grouped_PES %>%
dplyr::select(-Phylostratum) %>%
tidyr::pivot_longer(!c(GeneID, Groups), names_to = "Stage", values_to = "Abundance")
# make stages factors
grouped_PES_filt$Stage <- base::factor(grouped_PES_filt$Stage, ordered_stages)
# plot results
grouped_PES_filt_plot <- grouped_PES_filt %>%
ggplot2::ggplot(aes(x = Stage, y = Abundance, colour = Groups)) +
ggplot2::geom_line(alpha = 0.05, aes(group = GeneID)) +
ggplot2::facet_grid(~Groups) +
# ggplot2::geom_boxplot(colour = "black") +
see::geom_violinhalf(colour = "black", width = 1.2) +
# ggplot2::stat_summary(fun = "median", colour = "black") +
# ggplot2::stat_summary(fun.data = "mean_cl_boot", colour = "black", width = .4) +
ggsci::scale_colour_npg() +
# ggplot2::scale_x_discrete(guide = ggplot2::guide_axis(n.dodge=2)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
return(grouped_PES_filt_plot)
}
ordered_stages = c("meiospore", "immGA", "matGA", "oldGA", "gamete", "earlyPSP","immPSP", "matPSP", "mitospore")
# log2(x+1)
Ec_PES_32m.log2 %>%
dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [GA]", y = "log2(TPM+1)")
Ec_PES_32m.log2 %>%
dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [pSP]", y = "log2(TPM+1)")
Ec_PES_32m.log2 %>%
dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [unicell]", y = "log2(TPM+1)")
Ec_PES_25f.log2 %>%
dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [GA]", y = "log2(TPM+1)")
Ec_PES_25f.log2 %>%
dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [pSP]", y = "log2(TPM+1)")
Ec_PES_25f.log2 %>%
dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [unicell]", y = "log2(TPM+1)")
# rlog
Ec_PES_32m.rlog %>%
dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [GA]", y = "rlog(TPM)")
Ec_PES_32m.rlog %>%
dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [pSP]", y = "rlog(TPM)")
Ec_PES_32m.rlog %>%
dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [unicell]", y = "rlog(TPM)")
Ec_PES_25f.rlog %>%
dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [GA]", y = "rlog(TPM)")
Ec_PES_25f.rlog %>%
dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [pSP]", y = "rlog(TPM)")
Ec_PES_25f.rlog %>%
dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [unicell]", y = "rlog(TPM)")
The results are simply not that interesting. Genes from all PS act the same way and have the same underlying distribution, except the most recent PS’ (8 to 11), which has more lowly expressed genes.
# # Non-transformed
# Ec_PES_32m_denoised <-
# readr::read_csv(file = "data/Ec_PES_32m_denoised.csv")
# Ec_PES_25f_denoised <-
# readr::read_csv(file = "data/Ec_PES_25f_denoised.csv")
#
# # sqrt-tranformed
# Ec_PES_32m.sqrt_denoised <-
# readr::read_csv(file = "data/Ec_PES_32m.sqrt_denoised.csv")
# Ec_PES_25f.sqrt_denoised <-
# readr::read_csv(file = "data/Ec_PES_25f.sqrt_denoised.csv")
# log2-tranformed
Ec_PES_32m.log2_denoised <-
readr::read_csv(file = "data/Ec_PES_32m.log2_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Ec_PES_25f.log2_denoised <-
readr::read_csv(file = "data/Ec_PES_25f.log2_denoised.csv")
## Rows: 2719 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): GeneID
## dbl (10): Phylostratum, meiospore, immGA, matGA, oldGA, gamete, earlyPSP, im...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ordered_stages = c("meiospore", "immGA", "matGA", "oldGA", "gamete", "earlyPSP","immPSP", "matPSP", "mitospore")
# log2(x+1)
Ec_PES_32m.log2_denoised %>%
dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [GA] denoised", y = "log2(TPM+1)")
Ec_PES_32m.log2_denoised %>%
dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [pSP] denoised", y = "log2(TPM+1)")
Ec_PES_32m.log2_denoised %>%
dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec32m [unicell] denoised", y = "log2(TPM+1)")
Ec_PES_25f.log2_denoised %>%
dplyr::select(1,2,ends_with("GA")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [GA] denoised", y = "log2(TPM+1)")
Ec_PES_25f.log2_denoised %>%
dplyr::select(1,2,ends_with("PSP")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [pSP] denoised", y = "log2(TPM+1)")
Ec_PES_25f.log2_denoised %>%
dplyr::select(1,2, contains("spore"), contains("gamete")) %>% group_PS() %>%
plot_PGroups(ordered_stages) + ggplot2::labs(title = "Ec25f [unicell] denoised", y = "log2(TPM+1)")
Get session info.
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.2 (2022-10-31)
## os macOS Big Sur ... 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz Europe/Berlin
## date 2023-10-24
## pandoc 3.1.6.2 @ /usr/local/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## bit 4.0.5 2022-11-15 [1] CRAN (R 4.2.0)
## bit64 4.0.5 2020-08-30 [1] CRAN (R 4.2.0)
## bslib 0.5.1 2023-08-11 [1] CRAN (R 4.2.0)
## cachem 1.0.8 2023-05-01 [1] CRAN (R 4.2.0)
## callr 3.7.3 2022-11-02 [1] CRAN (R 4.2.0)
## cli 3.6.1 2023-03-23 [1] CRAN (R 4.2.0)
## codetools 0.2-19 2023-02-01 [1] CRAN (R 4.2.0)
## colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.2.0)
## crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.0)
## devtools 2.4.5 2022-10-11 [1] CRAN (R 4.2.0)
## digest 0.6.33 2023-07-07 [1] CRAN (R 4.2.0)
## dplyr * 1.1.3 2023-09-03 [1] CRAN (R 4.2.0)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
## evaluate 0.22 2023-09-29 [1] CRAN (R 4.2.2)
## fansi 1.0.5 2023-10-08 [1] CRAN (R 4.2.2)
## farver 2.1.1 2022-07-06 [1] CRAN (R 4.2.0)
## fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.2.0)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.2.0)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.0)
## fs 1.6.3 2023-07-20 [1] CRAN (R 4.2.0)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
## ggplot2 * 3.4.4 2023-10-12 [1] CRAN (R 4.2.2)
## ggsci 3.0.0 2023-03-08 [1] CRAN (R 4.2.0)
## glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
## gtable 0.3.4 2023-08-21 [1] CRAN (R 4.2.0)
## hms 1.1.3 2023-03-21 [1] CRAN (R 4.2.0)
## htmltools 0.5.6.1 2023-10-06 [1] CRAN (R 4.2.2)
## htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.2.0)
## httpuv 1.6.11 2023-05-11 [1] CRAN (R 4.2.2)
## insight 0.19.6 2023-10-12 [1] CRAN (R 4.2.2)
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.2.0)
## jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.2.0)
## knitr 1.44 2023-09-11 [1] CRAN (R 4.2.0)
## labeling 0.4.3 2023-08-29 [1] CRAN (R 4.2.0)
## later 1.3.1 2023-05-02 [1] CRAN (R 4.2.2)
## lattice 0.21-9 2023-10-01 [1] CRAN (R 4.2.2)
## lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.2.0)
## magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
## Matrix 1.5-4.1 2023-05-18 [1] CRAN (R 4.2.0)
## memoise 2.0.1 2021-11-26 [1] CRAN (R 4.2.0)
## mime 0.12 2021-09-28 [1] CRAN (R 4.2.0)
## miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.2.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
## myTAI * 1.0.1.9000 2023-10-03 [1] Github (drostlab/myTAI@e159136)
## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.2.0)
## pkgbuild 1.4.2 2023-06-26 [1] CRAN (R 4.2.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
## pkgload 1.3.3 2023-09-22 [1] CRAN (R 4.2.0)
## prettyunits 1.2.0 2023-09-24 [1] CRAN (R 4.2.0)
## processx 3.8.2 2023-06-30 [1] CRAN (R 4.2.0)
## profvis 0.3.8 2023-05-02 [1] CRAN (R 4.2.0)
## promises 1.2.1 2023-08-10 [1] CRAN (R 4.2.2)
## ps 1.7.5 2023-04-18 [1] CRAN (R 4.2.0)
## purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.2.2)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
## Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.2.0)
## readr * 2.1.4 2023-02-10 [1] CRAN (R 4.2.0)
## remotes 2.4.2.1 2023-07-18 [1] CRAN (R 4.2.2)
## rlang 1.1.1 2023-04-28 [1] CRAN (R 4.2.0)
## rmarkdown 2.25 2023-09-18 [1] CRAN (R 4.2.2)
## rstudioapi 0.15.0 2023-07-07 [1] CRAN (R 4.2.0)
## sass 0.4.7 2023-07-15 [1] CRAN (R 4.2.0)
## scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.0)
## see * 0.8.0 2023-06-05 [1] CRAN (R 4.2.0)
## sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
## shiny 1.7.5.1 2023-10-14 [1] CRAN (R 4.2.2)
## stringi 1.7.12 2023-01-11 [1] CRAN (R 4.2.0)
## stringr * 1.5.0 2022-12-02 [1] CRAN (R 4.2.0)
## tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.0)
## tidyr * 1.3.0 2023-01-24 [1] CRAN (R 4.2.0)
## tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.2.0)
## timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.0)
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.2.2)
## urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.2.0)
## usethis 2.2.2 2023-07-06 [1] CRAN (R 4.2.0)
## utf8 1.2.3 2023-01-31 [1] CRAN (R 4.2.0)
## vctrs 0.6.4 2023-10-12 [1] CRAN (R 4.2.2)
## vroom 1.6.4 2023-10-02 [1] CRAN (R 4.2.2)
## withr 2.5.1 2023-09-26 [1] CRAN (R 4.2.0)
## xfun 0.40 2023-08-09 [1] CRAN (R 4.2.2)
## xtable 1.8-4 2019-04-21 [1] CRAN (R 4.2.0)
## yaml 2.3.7 2023-01-23 [1] CRAN (R 4.2.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
##
## ──────────────────────────────────────────────────────────────────────────────